############### This code replicates analysis in the Appendix of "Negative Sentiment and Congressional Cue-taking on Social Media" ###############


##### Read in packages 
library(plm)
library(lmtest)
library(multiwayvcov)
library(plyr)
library(stringr)
library(base64enc)
library(stargazer)
library(lubridate)
library(readr)
library(dplyr)
library(ggplot2)


##### Set wd to wherever you have code and data 
setwd("") 


############### Read in data from 116th Congress ############### 

## Post-level dataframe (without CrowdTangle data)
cong16 <- read.csv("Cand_Fb_Posts_Replication_Cong16_Multimedia.csv", stringsAsFactors = FALSE)

## Post-level dataframe aggregated by candidate-week-multimedia 
posts_df2 <- read.csv("Cand_Fb_Posts_Cleaned_Cong16_AGG_CandWeek_Multimedia.csv", stringsAsFactors = FALSE)

## Post-level dataframe aggregated by candidate-week 
posts_week2 <- read.csv("Cand_Fb_Posts_Cleaned_Cong16_AGG_CandWeek_Overall.csv", stringsAsFactors = FALSE)


############### Analysis ############### 
options(digits = 3)

##### Plot tweets over time for Appendix 

### Prep dataframe of relevant variables 
cong16_week_all <- dplyr::select(cong16, fb_user, female, party, week, challenger, competitive)


### Appendix Figure A1: By Gender 
cong16_week_gender <- cong16_week_all %>% group_by(week, female) %>% mutate(week_posts = n()) %>%  select(week, female, week_posts)
cong16_week_gender <- unique(cong16_week_gender)

cong16_week_gender$week <- as_date(cong16_week_gender$week)
for(i in 1:nrow(cong16_week_gender)) {
  if(cong16_week_gender$female[i] == 1){
    cong16_week_gender$female[i] <- "Female"
  }
  if(cong16_week_gender$female[i] == 0){
    cong16_week_gender$female[i] <- "Male"
  }
}

colnames(cong16_week_gender)[2] <- "Gender"
g <- ggplot(cong16_week_gender, aes(x=week, y=week_posts, colour = Gender)) +
  geom_line() +  scale_x_date(date_labels = "%b %Y") +
  xlab("") + theme_classic() + ylab("Number of Weekly Posts") + scale_colour_grey()
g
ggsave("weeklyposts_gender.pdf")


### Appendix Figure A2: By Incumbency 
cong16_week_incum <- cong16_week_all %>% group_by(week, challenger) %>% mutate(week_posts = n()) %>%  select(week, challenger, week_posts)
cong16_week_incum <- unique(cong16_week_incum)

cong16_week_incum$week <- as_date(cong16_week_incum$week)
for(i in 1:nrow(cong16_week_incum)) {
  if(cong16_week_incum$challenger[i] == "challenger"){
    cong16_week_incum$challenger[i] <- "Non-Incumbent"
  }
  if(cong16_week_incum$challenger[i] == "incumbent"){
    cong16_week_incum$challenger[i] <- "Incumbent"
  }
}

colnames(cong16_week_incum)[2] <- "Incumbency"
inc <- ggplot(cong16_week_incum, aes(x=week, y=week_posts, colour = Incumbency)) +
  geom_line() +  scale_x_date(date_labels = "%b %Y") +
  xlab("") + theme_classic() + ylab("Number of Weekly Posts") + scale_colour_grey()
inc
ggsave("weeklyposts_incum.pdf")


### Appendix Figure A3: By Party
cong16_week_party <- cong16_week_all %>% group_by(week, party) %>% mutate(week_posts = n()) %>%  select(week, party, week_posts)
cong16_week_party <- unique(cong16_week_party)

cong16_week_party$week <- as_date(cong16_week_party$week)
colnames(cong16_week_party)[2] <- "Party"
par <- ggplot(cong16_week_party, aes(x=week, y=week_posts, colour = Party)) +
  geom_line() +  scale_x_date(date_labels = "%b %Y") +
  xlab("") + theme_classic() + ylab("Number of Weekly Posts") + scale_colour_grey()
par
ggsave("weeklyposts_party.pdf")


### Appendix Figure A4: By District Competitiveness 
cong16_week_comp <- cong16_week_all %>% group_by(week, competitive) %>% mutate(week_posts = n()) %>%  select(week, competitive, week_posts)
cong16_week_comp <- unique(cong16_week_comp)

for(i in 1:nrow(cong16_week_comp)) {
  if(cong16_week_comp$competitive[i] == 1) {
    cong16_week_comp$competitive[i] <- "Competitive"
  }
  if(cong16_week_comp$competitive[i] == 0) {
    cong16_week_comp$competitive[i] <- "Safe"
  }
}

colnames(cong16_week_comp)[2] <- "District"
cong16_week_comp$week <- as_date(cong16_week_comp$week)
comp <- ggplot(cong16_week_comp, aes(x=week, y=week_posts, colour = District)) +
  geom_line() +  scale_x_date(date_labels = "%b %Y") +
  xlab("") + theme_classic() + ylab("Number of Weekly Posts") + scale_colour_grey()
comp
ggsave("weeklyposts_comp.pdf")


####### Regression Analysis 

##### Define OLS Functions
ols_cand_wk_nocontrols <- function(x, IV, data) {
  lm(log(x+1) ~ IV + state + week, data = data) 
}

ols_cand_wk_controls <- function(x, IV, data) {
  lm(log(x+1) ~ IV + female + party + challenger + 
       competitive + white + log(num_posts+1) + log(avg_likes+1) + multimedia + state + week, 
     data = data) 
} 


####### Table A1

### H1:
C1 <- ols_cand_wk_nocontrols(x = posts_week2$avg_FBlikes, IV = posts_week2$prop_neg, data = posts_week2)
C3 <- ols_cand_wk_nocontrols(x = posts_week2$avg_comment, IV = posts_week2$prop_neg, data = posts_week2)
C5 <- ols_cand_wk_nocontrols(x = posts_week2$avg_shares, IV = posts_week2$prop_neg, data = posts_week2)


### H2:
C7 <- ols_cand_wk_nocontrols(x = posts_week2$avg_FBsad, IV = posts_week2$prop_neg, data = posts_week2)
C9 <- ols_cand_wk_nocontrols(x = posts_week2$avg_FBangry, IV = posts_week2$prop_neg, data = posts_week2)


### Appendix Table A1
stargazer(C1, C3, C5, C7, C9,
          title="OLS regressions of proportion of average negative words per post on candidates' logged average post likes, comments, shares, sad, and angry reactions received (by candidate-week)",
          dep.var.labels = "",
          column.labels = c("Avg. Post Likes",  
                            "Avg. Comments", "Avg. Shares", "Avg. Sad Reactions",  
                            "Avg. Angry Reactions"), 
          covariate.labels=c("Proportion Negative"),
          omit = c("state", "week"), 
          notes.align= "r",
          font.size = "small", 
          omit.stat = c("ser", "adj.rsq"), 
          notes = "Models include week and state fixed effects.", 
          column.sep.width = "2pt")


########## Robustness Check: Within-Candidate Models (Tables A2 & A3)

posts_df2$avgsad_allinter <- posts_df2$avg_FBsad/posts_df2$avg_Interactions
posts_df2$avgangry_allinter <- posts_df2$avg_FBangry/posts_df2$avg_Interactions

posts_week2$avgsad_allinter <- posts_week2$avg_FBsad/posts_week2$avg_Interactions
posts_week2$avgangry_allinter <- posts_week2$avg_FBangry/posts_week2$avg_Interactions

##### Create candidate-post type df 
cand_df_type <- posts_df2 %>% group_by(fb_user, multimedia) %>% dplyr::mutate(cand_prop_neg = mean(prop_neg), cand_prop_pos = mean(prop_pos), cand_avg_FBlikes = mean(avg_FBlikes), cand_avg_comment = mean(avg_comment), cand_avg_shares = mean(avg_shares), cand_avg_FBsad = mean(avg_FBsad), cand_avg_FBangry = mean(avg_FBangry), cand_avgsad_allinter = mean(avgsad_allinter), cand_avgangry_allinter = mean(avgangry_allinter)) %>% ungroup()
cand_df2_type <- select(cand_df_type, fb_user, cand_prop_neg, cand_prop_pos, cand_avg_FBlikes, cand_avg_comment, cand_avg_shares, cand_avg_FBsad, cand_avg_FBangry, cand_avgsad_allinter, cand_avgangry_allinter, multimedia, District, state, competitive, female, party, challenger, white, avg_likes, num_posts)
cand_df2_type <- unique(cand_df2_type)
cand_df3_type <- cand_df2_type %>% group_by(fb_user, multimedia) %>% distinct(cand_prop_neg, .keep_all= TRUE) 


##### Create candidate df 
cand_df <- posts_week2 %>% group_by(fb_user) %>% dplyr::mutate(cand_prop_neg = mean(prop_neg), cand_prop_pos = mean(prop_pos), cand_avg_FBlikes = mean(avg_FBlikes), cand_avg_comment = mean(avg_comment), cand_avg_shares = mean(avg_shares), cand_avg_FBsad = mean(avg_FBsad), cand_avg_FBangry = mean(avg_FBangry), cand_avgsad_allinter = mean(avgsad_allinter), cand_avgangry_allinter = mean(avgangry_allinter)) %>% ungroup()
cand_df2 <- select(cand_df, fb_user, cand_prop_neg, cand_prop_pos, cand_avg_FBlikes, cand_avg_comment, cand_avg_shares, cand_avg_FBsad, cand_avg_FBangry, cand_avgsad_allinter, cand_avgangry_allinter, District, state, competitive, female, party, challenger, white, avg_likes, num_posts)
cand_df2 <- unique(cand_df2)
cand_df3 <- cand_df2 %>% group_by(fb_user) %>% distinct(cand_prop_neg, .keep_all= TRUE) # 696 rows


##### Define OLS Function

ols_cand_controls <- function(x, IV, data) {
  lm(log(x+1) ~ IV + female + party + challenger + 
       competitive + white + log(num_posts+1) + log(avg_likes+1) + multimedia + state, 
     data = data) 
} # for multimedia: status = omitted category 


##### Robustness Check: Proportion negative by candidate (Table A2)

### H1
C2cand <- ols_cand_controls(x = cand_df3_type$cand_avg_FBlikes, IV = cand_df3_type$cand_prop_neg, data = cand_df3_type)
C4cand <- ols_cand_controls(x = cand_df3_type$cand_avg_comment, IV = cand_df3_type$cand_prop_neg, data = cand_df3_type)
C6cand <- ols_cand_controls(x = cand_df3_type$cand_avg_shares, IV = cand_df3_type$cand_prop_neg, data = cand_df3_type)

### H2
C8cand <- ols_cand_controls(x = cand_df3_type$cand_avg_FBsad, IV = cand_df3_type$cand_prop_pos, data = cand_df3_type)
C10cand <- ols_cand_controls(x = cand_df3_type$cand_avg_FBangry, IV = cand_df3_type$cand_prop_pos, data = cand_df3_type)


### Appendix Table A2 
stargazer(C2cand, C4cand, C6cand, C8cand, C10cand,
          title="OLS regressions of proportion of average negative words per post on candidates' logged average post likes, comments, shares, sad, and angry reactions received (by candidate), with controls.",
          dep.var.labels = "",
          column.labels = c("Avg. Post Likes",  
                            "Avg. Comments", "Avg. Shares", "Avg. Sad Reactions",  
                            "Avg. Angry Reactions"), 
          covariate.labels=c("Proportion Negative", "Female", "Republican", "Incumbent",
                             "Competitive", "White",
                             "Overall N Posts", 
                             "Overall Avg. Page Likes", "Multimedia"),
          omit = c("state"), 
          notes.align= "r",
          font.size = "small", 
          omit.stat = c("ser", "adj.rsq"), 
          notes = "Models include state fixed effects.", 
          column.sep.width = "2pt")


##### Robustness check: Proportion positive by candidate (Table A3)

### H1: 
C2candP <- ols_cand_controls(x = cand_df3_type$cand_avg_FBlikes, IV = cand_df3_type$cand_prop_pos, data = cand_df3_type)
C4candP <- ols_cand_controls(x = cand_df3_type$cand_avg_comment, IV = cand_df3_type$cand_prop_pos, data = cand_df3_type)
C6candP <- ols_cand_controls(x = cand_df3_type$cand_avg_shares, IV = cand_df3_type$cand_prop_pos, data = cand_df3_type)


### H2:
C8candP <- ols_cand_controls(x = cand_df3_type$cand_avg_FBsad, IV = cand_df3_type$cand_prop_pos, data = cand_df3_type)
C10candP <- ols_cand_controls(x = cand_df3_type$cand_avg_FBangry, IV = cand_df3_type$cand_prop_pos, data = cand_df3_type)


### Appendix Table A3
stargazer(C2candP, C4candP, C6candP, C8candP, C10candP,
          title="OLS regressions of proportion of average positive words per post on candidates' logged average post likes, comments, shares, sad, and angry reactions received (by candidate), with controls.",
          dep.var.labels = "",
          column.labels = c("Avg. Post Likes",  
                            "Avg. Comments", "Avg. Shares", "Avg. Sad Reactions",  
                            "Avg. Angry Reactions"), 
          covariate.labels=c("Proportion Positive", "Female", "Republican", "Incumbent",
                             "Competitive", "White",
                             "Overall N Posts", 
                             "Overall Avg. Page Likes", "Multimedia"),
          omit = c("state"), 
          notes.align= "r",
          font.size = "small", 
          omit.stat = c("ser", "adj.rsq"), 
          notes = "Models include state fixed effects.", 
          column.sep.width = "2pt")


####### Robustness Check: Using proportion of sad & angry reactions out of all reactions (Table A4)

posts_df2$avgsad_allinter <- posts_df2$avg_FBsad/posts_df2$avg_Interactions
posts_df2$avgangry_allinter <- posts_df2$avg_FBangry/posts_df2$avg_Interactions

posts_week2$avgsad_allinter <- posts_week2$avg_FBsad/posts_week2$avg_Interactions
posts_week2$avgangry_allinter <- posts_week2$avg_FBangry/posts_week2$avg_Interactions

### Testing H2
C7sad <- ols_cand_wk_nocontrols(x = posts_week2$avgsad_allinter, IV = posts_week2$prop_neg, data = posts_week2)
C8sad <- ols_cand_wk_controls(x = posts_df2$avgsad_allinter, IV = posts_df2$prop_neg, data = posts_df2)
C9ang <- ols_cand_wk_nocontrols(x = posts_week2$avgangry_allinter, IV = posts_week2$prop_neg, data = posts_week2)
C10ang <- ols_cand_wk_controls(x = posts_df2$avgangry_allinter, IV = posts_df2$prop_neg, data = posts_df2)



### Appendix Table A4
stargazer(C7sad, C8sad, C9ang, C10ang,
          title="OLS regressions of proportion of average negative words per post on candidates' logged average proportion of average sad and anger reactions, respectively, out of all interactions received (by candidate-week), with and without controls.",
          dep.var.labels = "",
          column.labels = c("Sad/All Interactions", "Sad/All Interactions", 
                            "Anger/All Interactions", "Anger/All Interactions"), 
          covariate.labels=c("Proportion Negative", "Female", "Republican", "Incumbent",
                             "Competitive", "White",
                             "Overall N Posts", 
                             "Overall Avg. Page Likes"),
          omit = c("state", "week"), 
          notes.align= "r",
          font.size = "small", 
          omit.stat = c("ser", "adj.rsq"), 
          notes = "Models include week and state fixed effects.", 
          column.sep.width = "2pt")


##### Robustness Check: Different for proportion positive words? (Table A5)
### H1: POSITIVE
C2P <- ols_cand_wk_controls(x = posts_df2$avg_FBlikes, IV = posts_df2$prop_pos, data = posts_df2)
C4P <- ols_cand_wk_controls(x = posts_df2$avg_comment, IV = posts_df2$prop_pos, data = posts_df2)
C6P <- ols_cand_wk_controls(x = posts_df2$avg_shares, IV = posts_df2$prop_pos, data = posts_df2)


### H2: POSITIVE
C8P <- ols_cand_wk_controls(x = posts_df2$avg_FBsad, IV = posts_df2$prop_pos, data = posts_df2)
C10P <- ols_cand_wk_controls(x = posts_df2$avg_FBangry, IV = posts_df2$prop_pos, data = posts_df2)

### Appendix Table A5
stargazer(C2P, C4P, C6P, C8P, C10P,
          title="OLS regressions of proportion of average positive words per post on candidates' logged average post likes, comments, shares, sad, and angry reactions received (by candidate-post type-week), with controls",
          dep.var.labels = "",
          column.labels = c("Avg. Post Likes",  
                            "Avg. Comments", "Avg. Shares", "Avg. Sad Reactions",  
                            "Avg. Angry Reactions"), 
          covariate.labels=c("Proportion Positive", "Female", "Republican", "Incumbent",
                             "Competitive", "White",
                             "Overall N Posts", 
                             "Overall Avg. Page Likes", "Multimedia"),
          omit = c("state", "week"), 
          notes.align= "r",
          font.size = "small", 
          omit.stat = c("ser", "adj.rsq"), 
          notes = "Models include week and state fixed effects.", 
          column.sep.width = "2pt")


##### Robustness Check: Love, Care, Wow, and Haha Reactions (Negative Sentiment) (Table A6)

### H2:
C8love <- ols_cand_wk_controls(x = posts_df2$avg_FBlove, IV = posts_df2$prop_neg, data = posts_df2)
C10care <- ols_cand_wk_controls(x = posts_df2$avg_FBcare, IV = posts_df2$prop_neg, data = posts_df2)
C12wow <- ols_cand_wk_controls(x = posts_df2$avg_FBwow, IV = posts_df2$prop_neg, data = posts_df2)
C14haha <- ols_cand_wk_controls(x = posts_df2$avg_FBhaha, IV = posts_df2$prop_neg, data = posts_df2)


### Appendix Table A6
stargazer(C8love, C10care, C12wow, C14haha,
          title="OLS regressions of proportion of average negative words per post on candidates' logged average proportion of average love, care, wow, and haha reactions, respectively, (by candidate-week) with controls.",
          dep.var.labels = "",
          column.labels = c("Avg. Love Reactions", "Avg. Care Reactions", "Avg. Wow Reactions", "Avg. Haha Reactions"), 
          covariate.labels=c("Proportion Negative", "Female", "Republican", "Incumbent",
                             "Competitive", "White",
                             "Overall N Posts", 
                             "Overall Avg. Page Likes", "Multimedia"),
          omit = c("state", "week"), 
          notes.align= "r",
          font.size = "small", 
          omit.stat = c("ser", "adj.rsq"), 
          notes = "Models include week and state fixed effects.", 
          column.sep.width = "2pt")



##### Robustness Check: Love, Care, Wow, and Haha Reactions (Positive Sentiment) (Table A7)

### H2:
C8love_pos <- ols_cand_wk_controls(x = posts_df2$avg_FBlove, IV = posts_df2$prop_pos, data = posts_df2)
C10care_pos <- ols_cand_wk_controls(x = posts_df2$avg_FBcare, IV = posts_df2$prop_pos, data = posts_df2)
C12wow_pos <- ols_cand_wk_controls(x = posts_df2$avg_FBwow, IV = posts_df2$prop_pos, data = posts_df2)
C14haha_pos <- ols_cand_wk_controls(x = posts_df2$avg_FBhaha, IV = posts_df2$prop_pos, data = posts_df2)


### Appendix Table A7
stargazer(C8love_pos, C10care_pos, C12wow_pos, C14haha_pos,
          title="OLS regressions of proportion of average positive words per post on candidates' logged average proportion of average love, care, wow, and haha reactions, respectively, (by candidate-week) with controls.",
          dep.var.labels = "",
          column.labels = c("Avg. Love Reactions", "Avg. Care Reactions", "Avg. Wow Reactions", "Avg. Haha Reactions"), 
          covariate.labels=c("Proportion Positive", "Female", "Republican", "Incumbent",
                             "Competitive", "White",
                             "Overall N Posts", 
                             "Overall Avg. Page Likes", "Multimedia"),
          omit = c("state", "week"), 
          notes.align= "r",
          font.size = "small", 
          omit.stat = c("ser", "adj.rsq"), 
          notes = "Models include week and state fixed effects.", 
          column.sep.width = "2pt")
